from bs4 import BeautifulSoup #导入BeautifulSoup库
import requests #导入requests库，用来读取在线网页数据的

### channel_extract：提取58同城二手频道列表
##  网址：http://hf.58.com/sale.shtml
##  时间：2017年10月17日

start_url = "http://hf.58.com/sale.shtml" #二手网首页
head_url = "http://hf.58.com"  #首页

def get_channel_urls(url):
    web_data = requests.get(url)
    soup = BeautifulSoup(web_data.text,'lxml')
    links = soup.select("#ymenu-side > ul > li.ym-tab > span.dlb > a")
    for link in links:
        if link.get('href') is not None:
            print(head_url+link.get('href')) #获得完整的链接地址

# get_channel_urls(start_url)

## 将获得的链接放入列表，供后续爬虫使用
channel_list = '''
http://hf.58.com/yishu/
http://hf.58.com/tushu/
http://hf.58.com/bangong/
http://hf.58.com/chengren/
'''